function Hcluster_bases(Dunit_base,saveinfo)

% Hierarchical displays illustrating patterns of gene expression within each of 46 bases in 
% human cell stress data

% Notes:
%% For Dunit_base, 1st column:           Original order of input data (sD.data);
%%                 2nd column:           Best Matching Unit belonging to;
%%                 3rd column:           Base belonging to; 
%%                 4th to last columns:  Original input data.

%% For saveinfo, the first cell:         Directory for data stored
%%               the second cell:        Save figures using specified format 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

base_index=unique(Dunit_base(:,3));
    
% For each of bases, gene expression matrix is hierarchically clustered and visualized.

% The missing values are imputed as the column medians within each base. Compared with
% other methods for the estimation of missing values, especially KNN (K-nearest neighbors), 
% it has taken into consideration the optimal number of nearest neighbors, which has been 
% embodied in the Gaussian neighborhood relations of neurons  as well as the tightness of 
% gene clustering within each base.

for k=1:length(base_index)
    scrsz = get(0,'ScreenSize');
    figure('Position',[scrsz(3)*1/20 scrsz(3)*1/20 scrsz(3)*7/10*(0.815/0.775) scrsz(3)*7/10])
    set(gcf,'paperpositionmode','auto','color','w')
    flag=ismember(Dunit_base(:,3),k);% flag for those genes belonging to the kth base
    sub_Dunit_base=Dunit_base(flag,:);
    num_genes=size(sub_Dunit_base,1); % number of genes belonging to the kth base
    num_neurons=size(unique(sub_Dunit_base(:,2)),1);% number of neurons belonging to the kth base
    subD=sub_Dunit_base(:,4:end);% gene expression matirx for the kth base
    
    % Imputation of missing values for the kth base
    tempD=subD;
    %% Find the missing values
    missingVals = isnan(tempD);  
    %% Find the column medians ignoring NaN values.
    colMedians = nanmedian(tempD);    
    %% Replace the missing values with the column medians.
    colMed = repmat(colMedians,size(tempD,1),1);
    tempD(missingVals) = colMed(missingVals);
    
    % Hierarchical clustering 
    y=pdist(tempD,'euclidean');% Euclidean distance as similarity measure
    z=linkage(y,'average');% Create the hierarchical cluster tree using the Average linkage algorithm
    [h,t,ord]=dendrogram(z,0,'orientation','top');clf;% Get the ordered node index
    
    % Color-coded image of hierarchical output
    imagesc(tempD(ord,:)),caxis([-1 1]);
    map_tricolor('rbg') % set the figure's colormap as red-black-green scheme
    
    % The following script is just suitable for Human cell stress data
    index_stress=[8 15 22 31 40 51 59 67 76];
    index_names={'HS_H','HS_F','HS_K','ER_HT','ER_HD','ER_FD','OS_HH','OS_HM','OS_FM'};
        tmp_xtick=([0 index_stress(1:end-1)]+(index_stress-[0 index_stress(1:end-1)])/2)+0.5;
    set(gca,'xtick',tmp_xtick,'xticklabel',index_names,'ytick',[])
    set(gca,'ticklength',[0 0],'xcolor','k','fontsize',8)
    title(['Base # ',num2str(k),'  Number of Neurons:',num2str(num_neurons),...
            '   Number of Genes:',num2str(num_genes)],'color','k','fontsize',10)
    h=colorbar('vert');
    unit_height=0.815/800;
    set(gca,'position',[0.13 0.925-unit_height*num_genes 0.75 unit_height*num_genes])
    set(h,'position',[0.9 0.925-unit_height*num_genes 0.01 unit_height*num_genes],'ytick',[-1 -0.5 0 0.5 1],'ycolor','k');
    hold on
    for i=1:length(index_stress)
        if i<length(index_stress)
            line([(index_stress(i)+0.5) (index_stress(i)+0.5)],[0 size(tempD,1)+0.5],'linestyle','-','linewidth',1,'color','w')
        end
    end
    saveas(gcf,[saveinfo{1},'\Visualize Hierarchical_cluster of the ',num2str(k),' base',saveinfo{2}])
    drawnow;
    close 
end

return
